In this multi-part lecture we will be working through an example of building out a nice visualization.
Now for a quick overview of ggplot2!
ggplot2 has several advantages:
ggplot2 is based on the grammar of graphics, the idea that you can build every graph from the same few components: a data set, a set of visual marks that represent data points, and a coordinate system. To display data values, map variables in the data set to aesthetic properties of the geom like size, color, and x and y locations.
ggplot2 is based off the grammar of graphics, which sets a paradigm for data visualization in layers:

Here is a badass cheat sheet for reference when using ggplot2!
# Warning suppression
options(warn=-1)
# import ggplot2
library(ggplot2)
# Repr can be used to easily adjust plot size.
library(repr)
# How to set the plot size with repr
options(repr.plot.width = 7.5, repr.plot.height = 7.5)
# Importing the Data
library("ggplot2movies")
df <- movies <- movies[sample(nrow(movies), 1000), ]
# Histogram with several options added
pl <- ggplot(df,aes(x=rating))
pl + geom_histogram(binwidth=0.1,color='blue',fill='red') + xlab('Movie Ratings')+ ylab('Occurences') + ggtitle(' Movie Ratings')
# Gradient fill
pl2 <- pl + geom_histogram(binwidth=0.1,aes(fill=..count..)) + xlab('Movie Ratings')+ ylab('Occurences')
pl2 + scale_fill_gradient('Count',low='darkgreen',high='lightblue')
# Adding a trendline with geom_density
pl + geom_histogram(aes(y=..density.., alpha=.5)) + geom_density(color='blue')
# Reading in/converting data for a stacked histogram
loans <- read.csv('data//loan_data.csv')
loans$inq.last.6mths <- as.factor(loans$inq.last.6mths)
loans$delinq.2yrs <- as.factor(loans$delinq.2yrs)
loans$pub.rec <- as.factor(loans$pub.rec)
loans$not.fully.paid <- as.factor(loans$not.fully.paid)
loans$credit.policy <- as.factor(loans$credit.policy)
# Stacked histogram
pl <- ggplot(loans,aes(x=fico))
pl <- pl + geom_histogram(aes(fill=not.fully.paid),color='black',bins=40,alpha=0.5)
pl + scale_fill_manual(values = c('blue','red')) + theme_bw()
# Scatter plot with shading added based on Cylinders (Note the aes(color = cyl))
pl <- ggplot(data=mtcars,aes(x = wt,y=mpg))
pl + geom_point(aes(color=cyl))
# Treating points as factors and adjusting the size
pl <- ggplot(data=mtcars,aes(x = wt,y=mpg))
pl + geom_point(aes(size=factor(cyl)))
# Using a gradient scale for HP
pl + geom_point(aes(colour = hp),size=4) + scale_colour_gradient(high='red',low = "blue")
head(mpg, 3)
# counts (or sums of weights)
g <- ggplot(mpg, aes(class))
# Number of cars in each class:
g + geom_bar()
# Stacked bar chart
g + geom_bar(aes(fill = drv))
# Basic boxplot on number of Cylinders vs MPG
pl <- ggplot(mtcars, aes(factor(cyl), mpg))
pl + geom_boxplot()
# Same plot w/color and a cord flip
pl + geom_boxplot(aes(fill = factor(cyl))) + coord_flip()
library('ggplot2movies')
# Density plot
pl <- ggplot(movies,aes(x = year,y=rating))
pl + geom_density2d()
# Making with the hexbin library and changing the gradient
library('hexbin')
pl + geom_hex() + scale_fill_gradient(high='red',low='blue')
library('plotly')
# Just the ggplot
pl <- ggplot(mtcars, aes(mpg, wt)) + geom_point()
pl
# Using repr with >20 for size results in readable graphs
options(repr.plot.width = 25, repr.plot.height = 25)
# Making a ggplot with Plotly
gpl<- ggplotly(pl)
gpl
# Another cool example of plotly
set.seed(123)
x <- rnorm(1000)
y <- rchisq(1000, df = 1, ncp = 0)
group <- sample(LETTERS[1:5], size = 1000, replace = T)
size <- sample(1:5, size = 1000, replace = T)
ds <- data.frame(x, y, group, size)
p <- plot_ly(ds, x = x, y = y, mode = "markers", split = group, size = size) %>%
layout(title = "Scatter Plot")
p